
scalar nb_keep = 7

*** hhheadhukou: for the only household head

use ../data/census1990individual.dta, clear

gen hhh = (hstatus == 1) //indicator for household head
tab hhh
bys hhcode: gen numhhh = sum(hhh)
keep if numhhh == 1 
keep if hstatus == 1 
gen hhheadhukou = hukou
gen hhheadagriemp = empAgri
keep hhcode hhheadhukou hhheadagriemp
count //2,923,140
save ../temp/census1990_hhheadhukou,replace


*** Family structure

use ../data/census1990individual, clear
		
tab hstatus // 1 hhhead, 2 spouse, 3 child, 4 grandchild, 5 parent, 6 grandparent, 7 other relative, 8 non-relative

tab fstatus // 1 "father" 2 "mother" 3 "child" 4 "grandchild" 5 "grandparent" 6 "grandgrandparent" 7 "other relatives" 8 "other non-relatives"

tab fstatus, gen(fstatus) 

sum fstatus*

* young and old
count if missing(age)

gen young = age < 15 

gen old = age > 64

collapse (sum) nfather = fstatus1 nmother = fstatus2 ///
nchild = fstatus3 ngrandchild = fstatus4 ngrandparent = fstatus5 ///
ngrandgrandparent = fstatus6 notherrel = fstatus7 nothernonrel = fstatus8 ///
(sum) nyoung = young nold = old ///
(count) nperson = hstatus ///
, by(hhcode) fast

gen nother = ngrandgrandparent + notherrel + nothernonrel

sum n*

save ../temp/census1990_famstruc, replace


*** parents

use ../data/census1990individual, clear

merge m:1 hhcode using ../temp/census1990_famstruc
drop _merge

tab nfather nmother // fine

keep if fstatus == 1 | fstatus == 2 // father 1, mother 2

preserve
	* father 1, mother 2
	gen pcode = "P1" if fstatus == 1
	replace pcode = "P2" if fstatus == 2
	
	local pvars "age birthy birthm male han ethn hukou empAgri localhukou eduy emp industry occupation unempstatus "
	keep prov hhcode pcode `pvars'
	reshape wide `pvars', i(hhcode) j(pcode) string
	
	save ../temp/census1990_parent, replace
restore

* mother's birth information
keep if fstatus == 2

gl BVARS "chbornm chbornf chsurvm chsurvf chborn chsurv chdecem chdecef chdece chborn_maleratio chsurv_maleratio chdece_maleratio chbornLY"

foreach var of varlist $BVARS {
rename `var' `var'P2
}

keep hhcode *P2

save ../temp/census1990_motherbirth, replace
	

*** children

use ../data/census1990individual, clear

keep if fstatus == 3 // children

count if missing(birthy)
count if missing(birthm)

tab birthm,m

* child birth order
sort hhcode birthy birthm
by hhcode: gen birthorder = _n

tab birthorder,m

* if twin sibling
bys hhcode birthy birthm: gen twin = (_N == 2) 

bys hhcode birthy birthm: gen triplet = (_N == 3) 

bys hhcode birthy birthm: gen quadruplet = (_N == 4) 

* twin code
sort hhcode birthy birthm birthorder
by hhcode birthy birthm: gen twcode = _n if twin == 1
label var twcode "Twin Code"

tab twcode,m

drop $BVARS

save ../temp/census1990_childlong, replace


*** household level, child aggregate	

use ../temp/census1990_childlong, replace

* number of children
tab male,m
bys hhcode: gen nchild = _N
bys hhcode: egen nmale = sum(male)	

* number of twins
by hhcode: egen ntwin = sum(twin)
gen maletwin = male*twin
by hhcode: egen nmaletwin = sum(maletwin)

* number of triplets
by hhcode: egen ntriplet = sum(triplet)

* order of twinning
gen twinorder = birthorder if twin == 1

tab twinorder,m // now still twin's birthorder, later collapse

* collapse to hh level
keep hhcode nchild nmale ntwin nmaletwin ntriplet twinorder age birthy
collapse (first) nchild nmale ntwin nmaletwin ntriplet (min) twinorder ///
(min) minchildage = age (max) maxchildage = age ///
(min) minchildbirthy = birthy (max) maxchildbirthy = birthy ///
, by(hhcode) fast

foreach var of varlist _all {
label var `var' ""
}

g twinsex = 1 if nmaletwin == 2 & ntwin == 2
replace twinsex = 2 if nmaletwin == 0 & ntwin == 2
replace twinsex = 3 if nmaletwin == 1 & ntwin == 2

tab ntriplet // 52 hh with triplets

g twinhh = ntwin >= 2

save ../temp/census1990_childcomposition, replace


*** children, wide form	

use ../temp/census1990_childlong, replace

keep if birthorder <= nb_keep

g ccode = ""
local n = nb_keep
display `n'
forvalues i = 1/`n' {
	display `i'
	replace ccode = "C`i'" if birthorder == `i'
}

local cvars "age birthy birthm male han hukou localhukou eduy edus emp "

keep hhcode ccode `cvars'
reshape wide `cvars', i(hhcode) j(ccode) string

save ../temp/census1990_childwide, replace


*** Merge household level data

* family structure
use ../temp/census1990_famstruc, clear

* hh level raw 
merge 1:1 hhcode using ../temp/census1990_hhraw
drop _merge

* parents information 
merge 1:1 hhcode using ../temp/census1990_parent
drop _merge

* hukou of household head
merge 1:1 hhcode using ../temp/census1990_hhheadhukou
drop _merge

* mother's fertility 
merge 1:1 hhcode using ../temp/census1990_motherbirth
drop _merge

* hh level child aggregate
merge 1:1 hhcode using ../temp/census1990_childcomposition
replace nchild = 0 if _merge == 1 // right, 0 zero change
replace nmale = 0 if _merge == 1
replace ntwin = 0 if _merge == 1
replace ntriplet = 0 if _merge == 1
drop _merge

* child, wide form
merge 1:1 hhcode using ../temp/census1990_childwide
drop _merge

* drop collective hh
keep if hhdomestic


* check
tab nchild nmale,m

g nfemale = nchild-nmale
tab nfemale,m

gen maleratio = nmale/nchild 
sum maleratio

g sampleChCoreside = nchild == chsurvP2 & chsurvP2 == chbornP2 & nmale == chsurvmP2 & !missing(chsurvP2) & !missing(chsurvmP2)
// all chidlren coreside, not death

tab sampleChCoreside
tab sampleChCoreside if ageP2 >= 15 & ageP2 < 65 // 61%

g sampleChCoresideMoreThanBirth = ageP2 >= 15 & ageP2 < 65 & (nchild > chsurvP2 | nmale > chsurvmP2 | nfemale > chsurvfP2) 


local n = nb_keep
forvalues i = 1/`n' {
	g age`i'birthP2 = birthyC`i' - birthyP2 if sampleChCoreside
}


*** additional variables

* both parents Han

g hanP = hanP1 * hanP2

*** samples

* twinning samples
forvalues i = 1/5 {
	g sample`i'plus = twinhh == 0 & nchild >= `i' | twinhh == 1 & twinorder == `i'
}

order hhcode prov hhhead*

foreach var of varlist _all {
	label var `var' ""
}

compress

save ../data/census1990hh, replace

*** Merge child level data

use ../temp/census1990_childlong, clear

merge m:1 hhcode using ../data/census1990hh
drop if _merge == 2
drop _merge

foreach var of varlist _all {
	label var `var' ""
}

compress

save ../data/census1990child, replace


*** erase temporary files to release space
	
erase ../temp/census1990_childcomposition.dta
erase ../temp/census1990_childlong.dta
erase ../temp/census1990_childwide.dta
erase ../temp/census1990_famstruc.dta
erase ../temp/census1990_hhheadhukou.dta
erase ../temp/census1990_hhraw.dta
erase ../temp/census1990_motherbirth.dta
erase ../temp/census1990_parent.dta	


